#!/usr/bin/env python
"""Download video meta-info for a given video urls from input file.

Input files is in Firefox'es bookmarks export file
"""
import csv
import re
import sys
import urlparse
from BeautifulSoup import BeautifulSoup
from gdata.youtube.service import YouTubeService

# parse bookmarks.html
with open(sys.argv[1]) as bookmark_file:
    soup = BeautifulSoup(bookmark_file.read())
    
# extract youtube video urls
video_url_regex = re.compile('http://www.youtube.com/watch')
urls = [link['href'] for link in soup('a', href=video_url_regex)]
        
# extract video ids from the urls
ids = []
for video_url in urls:
    url = urlparse.urlparse(video_url)
    video_id = urlparse.parse_qs(url.query).get('v')
    if not video_id: continue # no video_id in the url
    ids.append(video_id[0])
 
# get some statistics for the videos
yt_service = YouTubeService()
#NOTE: you don't need to authenticate for readonly requests
yt_service.ssl = True #NOTE: it works for readonly requests
#yt_service.debug = True # show requests


writer = csv.writer(open(sys.argv[2], 'wb'))
for video_id in ids:
    try:
        entry = yt_service.GetYouTubeVideoEntry(video_id=video_id)
        dir(entry.rating)
        ['FindExtensions', 'ToString', '_AddMembersToElementTree', '_BecomeChildElement', '_ConvertElementAttributeToMember', '_ConvertElementTreeToMember', '_HarvestElementTree', '_ToElementTree', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_attributes', '_children', '_namespace', '_tag', 'average', 'extension_attributes', 'extension_elements', 'max', 'min', 'num_raters', 'text']
        comments = yt_service.GetYouTubeVideoCommentFeed(video_id=video_id)
    except Exception, e:
        print >>sys.stderr, "Failed to retrieve entry video_id=%s: %s" %(
            video_id, e)
    else:
        title = entry.media.title.text
        print "Title:", title
        view_count = entry.statistics.view_count
        print "View count:", view_count
        favorites = entry.statistics.favorite_count
        print "Favorite Count:", favorites
        comments = comments.total_results.text
        print "Comment Count:", comments
        if entry.rating is None: # skip it
        	average = 0
        else:
        	average = entry.rating.average
        print "Average Rating:", average
        if entry.rating is None: # skip it
            num_raters = 0
        else:
	        num_raters = entry.rating.num_raters
        print "Number of Raters:", num_raters
        author = entry.author[0].name.text
        print "Autor:", author
        published = entry.published.text
        print "Published on:", published
        tags = entry.media.keywords.text
        print "Tags:", tags
        writer.writerow((video_id, title, view_count, favorites, comments, average, num_raters, author, published, tags))
